******************************************************************************
* Appendix D: Persistence Rates
*
* Created: 2-19-23
* Modified: 6-18-24
*
******************************************************************************

clear 
clear matrix
clear mata
set maxvar 10000


******************************************************************************
*** Percentage of missingness across relevant groups (same as in Chapter 3)
******************************************************************************

*** Keep only those observations that are in the analysis sample
use "elementsText\data\analyticalSample.dta", clear

keep cmp year survey

sort cmp year survey
tempfile analysis
save `analysis', replace

*** Load those observations into the Party Brands dataset
use "Data\Clean Data\MASTER.dta"

sort cmp year survey

di _N
merge cmp year survey using `analysis'
tab _merge
keep if _merge == 1
drop _merge

********************************************************************************
*** Generate some variables to measure missingness across categories
********************************************************************************

* Generate an age indicator
gen age5 = age
recode age5 (min/30=1) (31/40=2) (41/50=3) (51/64=4) (65/max=5)

* Left-right self-placement
gen placeself = lrs
recode placeself (0/99=1) (666 977 986=.) (987/999=0)

* Partisanship
gen inpartisan = 1 if pid == cmp 
recode inpartisan (.=0) if (pid != cmp & !inlist(pid, ., 77777, 88888, 99995, 99996, 99997, 99998, 99999)) | inlist(pid, 77777, 88888)

gen outpartisan = 1 if pid != cmp & !inlist(pid, ., 77777, 88888, 99995, 99996, 99997, 99998, 99999)
recode outpartisan (.=0) if (pid == cmp | inlist(pid, 77777, 88888))

gen nonpartisan = 1 if inlist(pid, 77777, 88888)
recode nonpartisan (.=0) if (pid == cmp) | (pid != cmp & !inlist(pid, ., 77777, 88888, 99995, 99996, 99997, 99998, 99999))

* Vote choice
gen vote = 1 if (voteint == cmp)
recode vote (.=0) if (voteint != cmp) & !inlist(voteint, ., .a)

gen vote_tm1 = 1 if (lastvote == cmp)
recode vote_tm1 (.=0) if (lastvote != cmp) & !inlist(lastvote, ., .a)

* Missingness
lab define lrparty 1 "Not missing" 666 "No CMP code" 977 "Not applicable" 988 "Don't know L-R" 989 "Haven't heard of party" 998 "Refused/No answer" 999 "Don't know"
lab val lrparty lrparty
recode lrparty (0/99=1) (987=.)

gen miss = lrparty
recode miss (1=0) (666 977 987=.) (988/999=1)

*** Table D1: Average Percentage of Missingness Across Relevant Groups (same as Table 4 in the manuscript)
foreach v of varlist age5 education income female inpartisan outpartisan nonpartisan vote_tm1 placeself {
	bys `v': sum miss
}

*** Figure D1: Coefficient plot for a linear probability model predicting missing party placements
reg miss age education income female inpartisan outpartisan vote_tm1 placeself
coefplot, drop(_cons placeself) xline(0) rename(age = "Age" education = "Education" income = "Income Quartiles" female = "Female" inpartisan = "In-Partisan" outpartisan = "Out-Partisan" vote_tm1 = "Vote_{t-1}" placeself = "Self Placement") title("Missingness")

* Substantive effects
qui reg miss age education income female inpartisan outpartisan vote_tm1 placeself

mat b = e(b)
di "Age effect = " = b[1,1]*17.3
di "Education effect = " = b[1,2]*2
di "Income effect = " = b[1,3]*2
di "Female effect = " = b[1,4]
di "Inpartisan effect = " = b[1,5]
di "outpartisan effect = " = b[1,6]
di "Previous vote effect = " = b[1,7]
di "Place Self effect = " = b[1,8]



******************************************************************************
*** Persistence rates for different categories of respondents (UK Internet wave panel)
******************************************************************************
use "uk_panel.dta", clear

*** Socioeconomic status
* Age
gen Y = age
recode Y (min/30=1) (31/max=0)
gen Y_lr_tm1 = Y * lr_tm1
reg lr lr_tm1 Y_lr_tm1 Y
lincom (lr_tm1 + Y_lr_tm1)
test Y_lr_tm1

* Female
gen F_lr_tm1 = female * lr_tm1 
reg lr lr_tm1 F_lr_tm1 female
lincom (lr_tm1 + F_lr_tm1)
test F_lr_tm1

* Working class
gen WC_lr_tm1 = workingclass * lr_tm1
reg lr lr_tm1 WC_lr_tm1 workingclass
lincom (lr_tm1 + WC_lr_tm1)
test WC_lr_tm1

* College degree (none)
gen NC = college
recode NC (0=1) (1=0)
gen NC_lr_tm1 = NC * lr_tm1
reg lr lr_tm1 NC_lr_tm1 NC
lincom (lr_tm1 + NC_lr_tm1)
test NC_lr_tm1

* Income
tab income, gen(I)
foreach n of numlist 1(1)3 {
	gen I`n'_lr_tm1 = I`n' * lr_tm1
}

reg lr lr_tm1 I1_lr_tm1 I2_lr_tm1 I3_lr_tm1 I1 I2 I3

lincom (lr_tm1 + I1_lr_tm1)
test I1_lr_tm1

lincom (lr_tm1 + I2_lr_tm1)
test I2_lr_tm1

lincom (lr_tm1 + I3_lr_tm1)
test I3_lr_tm1


*** Political characteristics
* Previous vote (non-voters)
gen NV = vote_tm1
recode NV (1=0) (0=1)
gen NV_lr_tm1 = NV * lr_tm1
reg lr lr_tm1 NV_lr_tm1 NV
lincom (lr_tm1 + NV_lr_tm1)
test NV_lr_tm1


* Non-partisans
gen NP_lr_tm1 = nonpartisan * lr_tm1
reg lr lr_tm1 NP_lr_tm1 nonpartisan
lincom (lr_tm1 + NP_lr_tm1)
test NP_lr_tm1


* Election interest (not at all, not very, and somewhat vs very)
gen nointerest = elec_interest
recode nointerest (1/3=1) (4=0)
gen NI_lr_tm1 = nointerest * lr_tm1
reg lr lr_tm1 NI_lr_tm1 nointerest
lincom (lr_tm1 + NI_lr_tm1)
test NI_lr_tm1


* Political knowledge (low knowledge versus correct)
gen noknow = know
recode noknow (0=1) (1=0)
gen NO_lr_tm1 = noknow * lr_tm1
reg lr lr_tm1 NO_lr_tm1 noknow
lincom (lr_tm1 + NO_lr_tm1)
test NO_lr_tm1

